# Appendix
library(tidyverse)
library(xtable)
library(stargazer)
library(monomvn)
library(ggthemes)
library(broom)
library(dplyr)
library(tidyr)
library(scales)

##########################################################
# D. Survey Demographics
##########################################################

source('Code/merge_waves_marijuana.R')
source('Code/merge_waves_tech.R')

# Process Experiment 1 (Marijuana) data
mar <- df
mar$racew1[grep(',', mar$racew1)] <- 'Multi-Racial'
mar$racew1 <- recode(mar$racew1, '1'='Black', '2'='Asian and Pacific Islander', '3'='Non-Hispanic White', '4'= 'Hispanic', '5'='Native American', '6'='Other', '7'='Other')
mar$genderw1 <- recode(mar$genderw1, '1'='Male', '2'='Female')
mar$genderw1 <- factor(mar$genderw1, levels=c('Male', "Female", 'Other'))
mar$eduw1 <- recode(mar$educw1, '1'='Did Not Graduate High School', '2' ='High School Graduate', '3' = 'Some College', '4'='2 Year Degree', '5'='4 Year Degree', '6'='Post Graduate Degree')
mar$eduw1 <- factor(mar$eduw1, levels=c('Did Not Graduate High School', 'High School Graduate', 'Some College', '2 Year Degree', '4 Year Degree', 'Post Graduate Degree'))
mar$pid3w1 <- recode(mar$party1w1, '1'='Democrat', '2'='Republican', '3'='Independent/ Third Party', '4'="Independent/ Third Party")
mar$pid3_leanerw1 <- ifelse(mar$party1w1 == 1, 'Democrat',
                            ifelse(mar$party1w1 == 2, 'Republican',
                                   ifelse(mar$party1w1 %in% c(3, 4) & mar$party4w1 == 1, 'Republican',
                                          ifelse(mar$party1w1 %in% c(3, 4) & mar$party4w1 == 2, 'Democrat',
                                                 ifelse(mar$party1w1 %in% c(3, 4) & mar$party4w1 == 3, 'Independent', NA)))))

mar$pid3_leanerw1 <- factor(mar$pid3_leanerw1, levels = c('Democrat', 'Republican', 'Independent'))


# Process Experiment 2 and 3 (Tech) data
get_race_vec <- function(race_df){
  race <- rep(NA, nrow(race_df))
  race[race_df$whitew1] <- 'Non-Hispanic White'
  race[race_df$blackw1] <- 'Black'
  race[race_df$hispanicw1] <- 'Hispanic'
  race[race_df$asianw1] <- 'Asian and Pacific Islander'
  race[race_df$native.americanw1] <- 'Native American'
  race[race_df$middle.easternw1| race_df$otherw1] <- 'Other'
  race[rowSums(race_df[,c('whitew1', 'blackw1', 'hispanicw1', 'asianw1', 'native.americanw1', 'middle.easternw1', 'otherw1')]) > 1] <- 'Multi-Racial'
  race 
}

process_tech_data <- function(df) {
  df$racew1 <- get_race_vec(df)
  df$eduw1 <- recode(df$eduw1, '1'='Did Not Graduate High School', '2' ='High School Graduate', '3' = 'Some College', '4'='2 Year Degree', '5'='4 Year Degree', '6'='Post Graduate Degree')
  df$eduw1 <- factor(df$eduw1, levels=c('Did Not Graduate High School', 'High School Graduate', 'Some College', '2 Year Degree', '4 Year Degree', 'Post Graduate Degree'))
  df$genderw1 <- factor(df$genderw1, levels=c('Male', "Female", 'Other'))
  df$pid3w1[df$pid3w1 == 'Independent'] <- 'Independent/ Third Party'
  df$pid3w1[df$pid3w1 == 'Other Party'] <- 'Independent/ Third Party'
  return(df)
}

text <- process_tech_data(text)
vid <- process_tech_data(vid)

create_summary_table <- function(var1, var2, var3, var_name) {
  summary_function <- function(vec) {
    round(prop.table(table(vec)), 2)
  }
  
  make_table <- function(vecs) {
    mat <- do.call(cbind, lapply(vecs, summary_function))
    colnames(mat) <- c('Experiment 1', 'Experiment 2', 'Experiment 3')
    mat
  }
  
  table_data <- make_table(list(var1, var2, var3))
  table_data <- as.data.frame(table_data)
  table_data <- tibble::rownames_to_column(table_data, var = var_name)
  return(table_data)
}


create_balance_table <- function(data, treat_var) {
  data[[treat_var]] <- factor(data[[treat_var]], levels = c("Control", "Fox", "MSNBC"))
  
  educ_levels <- c("Did Not Graduate High School", "High School Graduate", "Some College",
                   "2 Year Degree", "4 Year Degree", "Post Graduate Degree")
  gender_levels <- c("Male", "Female", "Other")
  party_levels  <- c("Democrat", "Republican", "Independent")
  race_levels   <- c("Non-Hispanic White", "Black", "Hispanic", "Asian and Pacific Islander",
                     "Native American", "Multi-Racial", "Other")
  
  get_balance <- function(data, var, treat_var, levels_order) {

    df <- data %>%
      filter(!!sym(var) %in% levels_order) %>%
      count(!!sym(treat_var), !!sym(var)) %>%  
      group_by(!!sym(treat_var)) %>%
      mutate(prop = round(n / sum(n), 2)) %>%
      ungroup()
    
    df_wide <- df %>%
      pivot_wider(
        id_cols = !!sym(var),
        names_from = !!sym(treat_var),
        values_from = prop,
        values_fill = list(prop = 0)
      )
    
    df_wide <- df_wide %>%
      mutate(!!var := factor(!!sym(var), levels = levels_order)) %>%
      arrange(!!sym(var)) %>%
      rename(Category = !!sym(var))
    
    total <- data %>%
      filter(!!sym(var) %in% levels_order) %>%
      count(!!sym(var)) %>%
      mutate(Total = round(n / sum(n), 2)) %>%
      mutate(!!sym(var) := factor(!!sym(var), levels = levels_order)) %>%
      arrange(!!sym(var)) %>%
      rename(Category = !!sym(var)) %>%
      dplyr::select(Category, Total)
    
    df_wide <- left_join(df_wide, total, by = "Category")
    
    return(df_wide)
  }
  
  educ_table   <- get_balance(data, "eduw1", treat_var, educ_levels) %>% mutate(Demographic = "Education")
  gender_table <- get_balance(data, "genderw1",  treat_var, gender_levels) %>% mutate(Demographic = "Gender")
  party_table  <- get_balance(data, "pid3_leanerw1",  treat_var, party_levels) %>% mutate(Demographic = "Party")
  race_table   <- get_balance(data, "racew1",   treat_var, race_levels) %>% mutate(Demographic = "Race")
  
  balance_table <- bind_rows(educ_table, gender_table, party_table, race_table)
  
  cols_needed <- c("Demographic", "Category", "Control", "Fox", "MSNBC", "Total")
  for (col in setdiff(cols_needed, names(balance_table))) {
    balance_table[[col]] <- 0
  }
  balance_table <- balance_table[, cols_needed]
  
  balance_table <- balance_table %>%
    group_by(Demographic) %>%
    mutate(Demographic = if_else(row_number() == 1, Demographic, "")) %>%
    ungroup()
  
  return(balance_table)
}

# --- Experiment 1 ---
mar_forced <- mar %>% 
  filter(forcedchoicew1 == 1, forcedchoicew2 == 0) %>%
  mutate(treatment = case_when(
    foxw1 == 1 ~ "Fox",
    msnbcw1 == 1 ~ "MSNBC",
    TRUE ~ "Control"
  ))

balance_exp1 <- create_balance_table(mar_forced, "treatment")

# --- Experiment 2 ---
text <- text %>%
  mutate(treatmentw1 = recode(treatmentw1,
                              "anti" = "Fox",
                              "pro" = "MSNBC",
                              "placebo" = "Control"))

balance_exp2 <- create_balance_table(text, "treatmentw1")

# --- Experiment 3 ---
vid <- vid %>%
  mutate(treatmentw1 = recode(treatmentw1,
                              "anti" = "Fox",
                              "pro" = "MSNBC",
                              "placebo" = "Control"))

balance_exp3 <- create_balance_table(vid, "treatmentw1")

print(xtable(balance_exp1, caption="Balance Table for Experiment 1", label="tab:exp1_balance"),
      file="Output/tableA4_exp1.tex", include.rownames=FALSE)

print(xtable(balance_exp2, caption="Balance Table for Experiment 2", label="tab:exp2_balance"),
      file="Output/tableA5_exp2.tex", include.rownames=FALSE)

print(xtable(balance_exp3, caption="Balance Table for Experiment 3", label="tab:exp3_balance"),
      file="Output/tableA6_exp3.tex", include.rownames=FALSE)


##########################################################
# H. Treatment effects for individual questions
##########################################################

### Experiment 1

df_forced <- df %>% filter(forcedchoicew1 == 1, forcedchoicew2==0)

outcomes <- c("mar_tradeoff", "mar_econ", "mar_costmore", "mar_fewserious",
              "mar_wrong", "mar_violence", "mar_legmed", "mar_serious",
              "mar_legrec", "danger_mar")

outcome_labels <- c("whether drug use is a health problem v criminal issue", 
                    "whether marijuana legalization makes the economy better", 
                    "whether government efforts to enforce marijuana laws cost more than they are worth", 
                    "whether the legalization of marijuana leads to fewer people using more serious drugs, such as heroin and cocaine",
                    "whether marijuana use is morally wrong", 
                    "whether marijuana use increases violent crime", 
                    "whether marijuana should be legal for medical use",
                    "whether marijuana use is a serious problem today",
                    "whether marijuana should be legal for recreational use", 
                    "how dangerous is marijuana")

create_and_print_table <- function(outcome, outcome_label) {
  model_w1 <- lm(as.formula(paste0(outcome, "w1 ~ foxw1 + msnbcw1")), data = df_forced)
  model_w2 <- lm(as.formula(paste0(outcome, "w2 ~ foxw1 + msnbcw1")), data = df_forced)
  
  capture.output(
    stargazer(model_w1, model_w2,
              type = "latex",
              out = paste0("Output/Individual_questions/", outcome, ".tex"),
              title = paste("Effect of treatment on", outcome_label),
              label = paste0("tab:", outcome),
              column.labels = c("Wave 1", "Wave 2"),
              model.names = FALSE,
              dep.var.labels = NULL,
              covariate.labels = c("Fox", "MSNBC"),
              omit.stat = c("f", "ser"),
              star.cutoffs = c(0.1, 0.05, 0.01),
              notes = "$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01",
              notes.append = FALSE,
              header = FALSE,
              omit = "Constant",
              dep.var.labels.include = FALSE,
              keep.stat = c("n", "rsq", "adj.rsq"), 
              omit.table.layout = "m"
    )
  )
}
walk2(outcomes, outcome_labels, create_and_print_table)


### Experiment 2

outcomes <- c("scale", "censorship", "privacy", "congress", "influence", "fav_tech")
outcome_labels <- c("whether the size of big tech is good for consumers",
                    "whether social networks should remove more false, offensive, misleading, and harmful content", 
                    "whether big tech does a good job of keeping user info secure", 
                    "whether congress should do more to regulate how big tech gather data", 
                    "whether big tech companies exert too much influence over the political life in america",
                    "favorability of big tech companies")

create_and_print_table <- function(outcome, outcome_label) {
  model_w1 <- lm(as.formula(paste0(outcome, "w1 ~ treatmentw1")), data = text)
  model_w2 <- lm(as.formula(paste0(outcome, "w2 ~ treatmentw1")), data = text)
  capture.output(
    stargazer(model_w1, model_w2,
              type = "latex",
              out = paste0("Output/Individual_questions/", outcome, "_text.tex"),
              title = paste("Effect of treatment on", outcome_label),
              label = paste0("tab:text_", outcome),
              column.labels = c("Wave 1", "Wave 2"),
              model.names = FALSE,
              dep.var.labels = NULL,
              covariate.labels = c("Fox", "MSNBC"),
              omit.stat = c("f", "ser"),
              star.cutoffs = c(0.1, 0.05, 0.01),
              notes = "$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01",
              notes.append = FALSE,
              dep.var.labels.include = FALSE,
              header = FALSE,
              omit = "Constant",
              keep.stat = c("n", "rsq", "adj.rsq")
    )
  )
}
walk2(outcomes, outcome_labels, create_and_print_table)

### Experiment 3

outcomes <- c("scale", "censorship", "privacy", "congress", "influence", "fav_tech")
outcome_labels <- c("whether the size of big tech is good for consumers",
                    "whether social networks should remove more false, offensive, misleading, and harmful content", 
                    "whether big tech does a good job of keeping user info secure", 
                    "whether congress should do more to regulate how big tech gather data", 
                    "whether big tech companies exert too much influence over the political life in america",
                    "favorability of big tech companies")

create_and_print_table <- function(outcome, outcome_label) {
  model_w1 <- lm(as.formula(paste0(outcome, "w1 ~ treatmentw1")), data = vid)
  model_w2 <- lm(as.formula(paste0(outcome, "w2 ~ treatmentw1")), data = vid)
  capture.output(
    stargazer(model_w1, model_w2,
              type = "latex",
              out = paste0("Output/Individual_questions/", outcome, "_vid.tex"),
              title = paste("Effect of treatment on", outcome_label),
              label = paste0("tab:text_", outcome),
              column.labels = c("Wave 1", "Wave 2"),
              model.names = FALSE,
              dep.var.labels = NULL,
              covariate.labels = c("Fox", "MSNBC"),
              omit.stat = c("f", "ser"),
              star.cutoffs = c(0.1, 0.05, 0.01),
              notes = "$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01",
              notes.append = FALSE,
              dep.var.labels.include = FALSE,
              header = FALSE,
              omit = "Constant",
              keep.stat = c("n", "rsq", "adj.rsq")
    )
  )
}
walk2(outcomes, outcome_labels, create_and_print_table)



##########################################################
# E. Attrition Rates across Experiments 1–3
##########################################################

# --- Attrition: Experiment 1 (Marijuana) ---
exp1_attr <- mar %>%
  filter(forcedchoicew1 == 1) %>%
  transmute(
    Experiment = 1,
    treatment = case_when(
      foxw1 == 1    ~ "Fox",
      msnbcw1 == 1  ~ "MSNBC",
      TRUE          ~ "Control"
    ),
    resp_w1 = !is.na(PIDw1),
    resp_w2 = !is.na(Progressw2),
    resp_w3 = !is.na(Progressw3)
  )


# --- Attrition: Experiment 2 (Text) ---
exp2_attr <- text %>%
  transmute(
    Experiment = 2,
    treatment = factor(
      case_when(
        treatmentw1 %in% c("Control","Fox","MSNBC") ~ treatmentw1,
        treatmentw1 == "placebo" ~ "Control",
        treatmentw1 == "anti"    ~ "Fox",
        treatmentw1 == "pro"     ~ "MSNBC",
        TRUE ~ NA_character_
      ),
      levels = c("Control","Fox","MSNBC")
    ),
    resp_w1 = !is.na(EndDatew1),
    resp_w2 = !is.na(EndDatew2),
    resp_w3 = NA_real_
  )


# --- Attrition: Experiment 3 (Video) ---
exp3_attr <- vid %>%
  transmute(
    Experiment = 3,
    treatment = factor(
      case_when(
        treatmentw1 %in% c("Control","Fox","MSNBC") ~ treatmentw1,
        treatmentw1 == "placebo" ~ "Control",
        treatmentw1 == "anti"    ~ "Fox",
        treatmentw1 == "pro"     ~ "MSNBC",
        TRUE ~ NA_character_
      ),
      levels = c("Control","Fox","MSNBC")
    ),
    resp_w1 = !is.na(EndDatew1),
    resp_w2 = !is.na(EndDatew2),
    resp_w3 = NA_real_
  )


attrition_all <- bind_rows(exp1_attr, exp2_attr, exp3_attr) %>%
  filter(!is.na(treatment)) %>%     
  group_by(Experiment, treatment) %>%
  summarise(
    `$N_{wave1}$` = sum(resp_w1, na.rm = TRUE),
    `$N_{wave2}$` = sum(resp_w2, na.rm = TRUE),
    `$N_{wave3}$` = if_else(first(Experiment) == 1,
                            sum(resp_w3, na.rm = TRUE),
                            NA_real_),
    .groups = "drop"
  ) %>%
  mutate(
    `Attrition Rate` = if_else(
      Experiment == 1,
      round((`$N_{wave1}$` - `$N_{wave3}$`) / `$N_{wave1}$`, 2),
      round((`$N_{wave1}$` - `$N_{wave2}$`) / `$N_{wave1}$`, 2)
    )
  ) %>%
  arrange(Experiment, treatment) %>%
  rename(Treatment = treatment) %>%
  distinct() %>%        
  mutate(
    Experiment     = as.integer(Experiment),
    `$N_{wave1}$`  = as.integer(`$N_{wave1}$`),
    `$N_{wave2}$`  = as.integer(`$N_{wave2}$`),
    `$N_{wave3}$`  = as.integer(`$N_{wave3}$`)
  )

print(
  xtable(
    attrition_all,
    caption = "Sample Sizes and Attrition Rates by Experiment and Treatment",
    label   = "tab:all_attrition",
    digits  = c(0, 0, 0, 0, 0, 0, 2)
  ),
  file             = "Output/tableA7_All_Attrition.tex",
  include.rownames = FALSE
)




##########################################################
# E. Baseline Demographics by Wave Participation
#    for Experiments 1, 2, and 3
##########################################################

make_wave_balance <- function(df, var, levels) {
  var_sym <- rlang::sym(var)
  df %>%
    dplyr::filter(!!var_sym %in% levels) %>%
    dplyr::count(wave, !!var_sym) %>%
    dplyr::group_by(wave) %>%
    dplyr::mutate(prop = round(n / sum(n), 2)) %>%
    dplyr::ungroup() %>%
    dplyr::select(!!var_sym, wave, prop) %>%
    tidyr::pivot_wider(
      id_cols     = !!var_sym,
      names_from  = wave,
      values_from = prop,
      values_fill = list(prop = 0)
    ) %>%
    dplyr::rename(Category = !!var_sym)
}

educ_lvls   <- c("Did Not Graduate High School","High School Graduate","Some College",
                 "2 Year Degree","4 Year Degree","Post Graduate Degree")
gender_lvls <- c("Male","Female")
party_lvls  <- c("Democrat","Republican","Independent")
race_lvls   <- c("Non-Hispanic White","Black","Hispanic","Asian and Pacific Islander",
                 "Native American","Multi-Racial","Other")

# --- Experiment 1 (Marijuana) ---
wave1_df1 <- mar %>%
  filter(forcedchoicew1 == 1, !is.na(PIDw1)) %>%
  transmute(wave = "Wave 1",
            edu    = eduw1,
            gender = genderw1,
            race   = racew1,
            party  = pid3_leanerw1)

wave2_df1 <- mar %>%
  filter(forcedchoicew1 == 1, !is.na(Progressw2)) %>%
  transmute(wave = "Wave 2",
            edu    = eduw1,
            gender = genderw1,
            race   = racew1,
            party  = pid3_leanerw1)

wave3_df1 <- mar %>%
  filter(forcedchoicew1 == 1, !is.na(Progressw3)) %>%
  transmute(wave = "Wave 3",
            edu    = eduw1,
            gender = genderw1,
            race   = racew1,
            party  = pid3_leanerw1)

balance_waves1 <- bind_rows(wave1_df1, wave2_df1, wave3_df1) %>%
  mutate(wave = factor(wave, levels = c("Wave 1", "Wave 2", "Wave 3")))

edu_bal1    <- make_wave_balance(balance_waves1, "edu",    educ_lvls)   %>% mutate(Demographic="Education")
gender_bal1 <- make_wave_balance(balance_waves1, "gender", gender_lvls)%>% mutate(Demographic="Gender")
party_bal1  <- make_wave_balance(balance_waves1, "party",  party_lvls) %>% mutate(Demographic="Party")
race_bal1   <- make_wave_balance(balance_waves1, "race",   race_lvls)  %>% mutate(Demographic="Race")

baseline_demo_1 <- bind_rows(edu_bal1, gender_bal1, party_bal1, race_bal1) %>%
  dplyr::select(Demographic, Category, `Wave 1`, `Wave 2`, `Wave 3`) %>%
  dplyr::group_by(Demographic) %>%
  dplyr::mutate(
    Demographic = if_else(dplyr::row_number() == 1, Demographic, "")
  ) %>%
  dplyr::ungroup()


print(
  xtable(
    baseline_demo_1,
    caption = "Demographics by Wave in Experiment 1",
    label   = "tab:baseline_demo_wave_exp1"
  ),
  file             = "Output/tableA8_Baseline_Demo_Waves_Exp1.tex",
  include.rownames = FALSE
)


# --- Experiment 2 (Text) ---
wave1_df2 <- text %>%
  filter(!is.na(EndDatew1)) %>%
  transmute(wave = "Wave 1",
            edu    = eduw1,
            gender = genderw1,
            race   = racew1,
            party  = pid3_leanerw1)

wave2_df2 <- text %>%
  filter(!is.na(EndDatew2)) %>%
  transmute(wave = "Wave 2",
            edu    = eduw1,
            gender = genderw1,
            race   = racew1,
            party  = pid3_leanerw1)

balance_waves2 <- bind_rows(wave1_df2, wave2_df2) %>%
  mutate(wave = factor(wave, levels = c("Wave 1", "Wave 2")))

edu_bal2    <- make_wave_balance(balance_waves2, "edu",    educ_lvls)   %>% mutate(Demographic="Education")
gender_bal2 <- make_wave_balance(balance_waves2, "gender", gender_lvls)%>% mutate(Demographic="Gender")
party_bal2  <- make_wave_balance(balance_waves2, "party",  party_lvls) %>% mutate(Demographic="Party")
race_bal2   <- make_wave_balance(balance_waves2, "race",   race_lvls)  %>% mutate(Demographic="Race")

baseline_demo_2 <- bind_rows(edu_bal2, gender_bal2, party_bal2, race_bal2) %>%
  dplyr::select(Demographic, Category, `Wave 1`, `Wave 2`) %>%
  dplyr::group_by(Demographic) %>%
  dplyr::mutate(Demographic = if_else(dplyr::row_number()==1, Demographic, "")) %>%
  dplyr::ungroup()


print(
  xtable(
    baseline_demo_2,
    caption = "Demographics by Wave in Experiment 2 (Text)",
    label   = "tab:baseline_demo_wave_exp2"
  ),
  file             = "Output/tableA9_Baseline_Demo_Waves_Exp2.tex",
  include.rownames = FALSE
)


# --- Experiment 3 (Video) ---
wave1_df3 <- vid %>%
  filter(!is.na(EndDatew1)) %>%
  transmute(wave = "Wave 1",
            edu    = eduw1,
            gender = genderw1,
            race   = racew1,
            party  = pid3_leanerw1)

wave2_df3 <- vid %>%
  filter(!is.na(EndDatew2)) %>%
  transmute(wave = "Wave 2",
            edu    = eduw1,
            gender = genderw1,
            race   = racew1,
            party  = pid3_leanerw1)

balance_waves3 <- bind_rows(wave1_df3, wave2_df3) %>%
  mutate(wave = factor(wave, levels = c("Wave 1", "Wave 2")))

edu_bal3    <- make_wave_balance(balance_waves3, "edu",    educ_lvls)   %>% mutate(Demographic="Education")
gender_bal3 <- make_wave_balance(balance_waves3, "gender", gender_lvls)%>% mutate(Demographic="Gender")
party_bal3  <- make_wave_balance(balance_waves3, "party",  party_lvls) %>% mutate(Demographic="Party")
race_bal3   <- make_wave_balance(balance_waves3, "race",   race_lvls)  %>% mutate(Demographic="Race")

baseline_demo_3 <- bind_rows(edu_bal3, gender_bal3, party_bal3, race_bal3) %>%
  dplyr::select(Demographic, Category, `Wave 1`, `Wave 2`) %>%
  dplyr::group_by(Demographic) %>%
  dplyr::mutate(Demographic = if_else(dplyr::row_number()==1, Demographic, "")) %>%
  dplyr::ungroup()

print(
  xtable(
    baseline_demo_3,
    caption = "Demographics by Wave in Experiment 3 (Video)",
    label   = "tab:baseline_demo_wave_exp3"
  ),
  file             = "Output/tableA10_Baseline_Demo_Waves_Exp3.tex",
  include.rownames = FALSE
)


##########################################################
# F. Wave 2 Choice in Experiment 1
##########################################################
w1 <- read_csv("Data/MediaSSI_Dec2017_w1_recoded.csv")
w2 <- read_csv("Data/MediaSSI_Dec2017_w2_recoded.csv")
w3 <- read_csv("Data/MediaSSI_Dec2017_w3_recoded.csv")

keep_best <- \(df, prog) df %>%
  filter(!is.na(.data[[prog]])) %>%
  arrange(PID, desc(.data[[prog]])) %>%
  group_by(PID) %>% slice_head(n = 1) %>% ungroup()

w1u <- keep_best(w1, "Progress")
w2u <- keep_best(w2, "Progress")
w3u <- keep_best(w3, "Progress")

w123 <- w1u %>%
  left_join(w2u, by = "PID", suffix = c("", "_w2")) %>%
  left_join(w3u, by = "PID", suffix = c("", "_w3"))

analysis_data <- w123 %>%
  filter(forcedchoice == 1 & forcedchoice_w2 == 0) %>%
  mutate(
    wave1_treatment = case_when(
      fox == 1 ~ "Fox",
      msnbc == 1 ~ "MSNBC",
      entertainment == 1 ~ "Entertainment",
      TRUE ~ NA_character_
    ),
    wave2_choice = article_read_w2
  ) %>%
  filter(!is.na(wave1_treatment) & !is.na(wave2_choice) & 
           wave2_choice %in% c("Fox", "MSNBC", "Entertainment"))

cat(sprintf("Analyzing %d respondents\n\n", nrow(analysis_data)))

flow_data <- analysis_data %>%
  count(wave1_treatment, wave2_choice, name = "freq") %>%
  group_by(wave1_treatment) %>%
  mutate(
    total_wave1 = sum(freq),
    pct_within_treatment = freq / total_wave1 * 100
  ) %>%
  ungroup()



# Summary table
summary_table <- flow_data %>%
  dplyr::select(wave1_treatment, wave2_choice, freq, pct_within_treatment) %>%
  arrange(wave1_treatment, desc(pct_within_treatment)) %>%
  mutate(pct_within_treatment = round(pct_within_treatment, 1))

cat("Summary of Wave 2 Choices by Wave 1 Treatment:\n")
cat("=============================================\n\n")

for(treatment in c("Entertainment", "Fox", "MSNBC")) {
  cat(paste0("Wave 1 Treatment: ", treatment, "\n"))
  cat("-------------------\n")
  
  treatment_data <- summary_table %>% 
    filter(wave1_treatment == treatment)
  
  total_n <- sum(treatment_data$freq)
  cat(sprintf("  Total respondents: %d\n", total_n))
  
  for(i in 1:nrow(treatment_data)) {
    cat(sprintf("    Chose %s in Wave 2: %d respondents (%.1f%%)\n",
                treatment_data$wave2_choice[i],
                treatment_data$freq[i],
                treatment_data$pct_within_treatment[i]))
  }
  cat("\n")
}

# Statistical test for independence
cat("\nChi-square test of independence:\n")
cat("================================\n")

# Contingency table
cont_table <- table(analysis_data$wave1_treatment, analysis_data$wave2_choice)

# Chi-square test
chi_test <- chisq.test(cont_table)
cat(sprintf("Chi-square statistic: %.2f\n", chi_test$statistic))
cat(sprintf("Degrees of freedom: %d\n", chi_test$parameter))
cat(sprintf("P-value: %.4f\n", chi_test$p.value))

# Calculate "stickiness"
stickiness <- flow_data %>%
  filter(wave1_treatment == wave2_choice) %>%
  group_by(wave1_treatment) %>%
  summarise(
    stayed_same = sum(freq),
    .groups = 'drop'
  ) %>%
  left_join(
    flow_data %>%
      group_by(wave1_treatment) %>%
      summarise(total = sum(freq), .groups = 'drop'),
    by = "wave1_treatment"
  ) %>%
  mutate(
    pct_stayed = stayed_same / total * 100
  )

cat("\n\n'Stickiness' - Respondents who chose same outlet in Wave 2:\n")
cat("========================================================\n")
for(i in 1:nrow(stickiness)) {
  cat(sprintf("%s: %.1f%% stayed with %s\n", 
              stickiness$wave1_treatment[i],
              stickiness$pct_stayed[i],
              stickiness$wave1_treatment[i]))
}


# Stacked bar chart
stacked_bar <- ggplot(flow_data, 
                      aes(x = wave1_treatment, y = pct_within_treatment, fill = wave2_choice)) +
  geom_bar(stat = "identity", position = "stack") +
  geom_text(aes(label = sprintf("%.0f%%", pct_within_treatment)),
            position = position_stack(vjust = 0.5),
            size = 3.5,
            color = "white") +
  scale_fill_manual(values = c("Entertainment" = "#808080",
                               "Fox" = "#A31F34", 
                               "MSNBC" = "#315485")) +
  scale_y_continuous(labels = percent_format(scale = 1)) +
  labs(
       x = "Wave 1 Treatment (Forced Exposure)",
       y = "Percentage",
       fill = "Wave 2 Choice\n(Free Choice)") +
  theme_minimal() +
  theme(legend.position = "right",
        axis.text = element_text(size = 11),
        axis.title = element_text(size = 12),
        plot.title = element_text(size = 14, face = "bold"))
ggsave("Output/figA3_wave2_choice_stacked_bar.pdf", stacked_bar, width = 8, height = 6)



